

*local date : di  %tdCY-N-D  daily("$S_DATE", "DMY")
*local hh = substr("$S_TIME",1,2)
*local mm = substr("$S_TIME",4,2)
*di "`hh'" "`mm'"
local name "B5_survey_prepare"
cap log close
log using "${pfile}/`name'", s replace

global varmatch "year uf dtbirth gend race large" // metro  race school
global pop = 1000000

// *** // Informality rates based on 2010 Census // *** //

use "${pcenso}/censo2010cem.dta", clear
keep v0001 v0002 v0010 v0641 v0654 v0642 v0643 v0644 v6036 v6920 v6930 v1006 v0601

g year = 2010

rename v0001 uf
rename v0002 munibr
rename v0641 work_1w
rename v0654 empstatd
recode empstatd (2=3) (1=2)  // 3 - Inactive 2 - Unemployed
replace empstatd = 1 if work_1w == 1 | v0642 == 1 | v0643 == 1 | v0644 == 1     // Employed

g unemp = 1 if empstatd == 2
replace unemp = 0 if empstatd == 1

keep if v6036 >= 10 // age>10

// count only informal job
g inform = 0 if v6920 == 1 // employed individuals
replace inform = 1 if v6930 == 3

// count informal job & self-employed
g inform2 = 0 if v6920 == 1
replace inform2 = 1 if v6930 == 3
replace inform2 = 1 if v6930 == 4

// count informal job & self-employed in urban areas
g inform3 = inform2 if v1006 == 1

rename v0601 gend
replace gend = gend - 1

keep uf year gend unemp inform inform2 inform3 v0010

collapse unemp inform inform2 inform3 [pw=v0010], by(year uf gend) 

save gtemp2010, replace


// *** // Data on municipality population to merge with admin data // *** //
clear
import excel "${dbox}\Data\PNAD\pop200414.xls", sheet("Séries") firstrow
rename Codigo mun
reshape long year, i(mun) j(y)
rename year pop
rename y year
destring mun, replace
replace mun = int(mun/10)
save "pop200414.dta", replace

// *** // prepare admin data // *** //
use "B0_ui.dta", clear

g cash = (x<=0)
label var cash "Av. Earnings(t-1) < 2 m.w."
g ind_cons = (ind==4)
label var ind_cons "Constrution Sector"
g white = race==2
label var white "White worker"

keep if year >=2004

replace race = 9 if race == 99
replace race = 9 if race == -1

keep if tpvinculo == 10
keep if causadesli == 11

// RD support vars
cap drop x
g x = tempempr - 24
replace x = . if tempempr > 22  & tempempr <= 24 
g k = (x>=0&x!=.)
g kx = k*x

// Only workers laid-off before the survey reference period
replace year = year+1 if dt_dism > mdy(10,1,year) // reference period in pnad oct-sept
drop if year == 2015 | year == 2004 | year == 2010
keep if dt_dism < mdy(9,17,year)

g w9 = (dt_dism+und*7) <= mdy(9,24,year)

merge m:1 year cpf using B5_emp9_cpf, nogen keep(1 3)
replace emp9 = 0 if emp9==.

keep year uf mun cpf emp9 w9 remsept s3 und_5 und dtbirth gend race tempempr x k kx last_lay dt_dism ///
pred_s3 pred_und_5 pred_und remmedr remmedia school white gend hor age_dism cash ind_cons // emp_dur nbirth 

merge m:1 mun year using "${dbox}\Data\PNAD\pop200414.dta", keep(1 3) keepus(pop)

save "B2_mergeadmin.dta", replace
*/



// *** // GENERATE PNAD DATA ON ALL UNEMPLOYMENT SPELLS FROM 2001 // *** //

use "${ppnad}pnad200214pes.dta" , clear

label var v9005 "employed- #jobs in the week"
label var v9906 "employed- ocupation"
label var v9029 "employed- job type" // 1 empregado 2 trab domestico 3 conta propria 4 empregador
label var v9032 "employed-private public"
label var v9042 "employed-formal"
label var v9058 "employed- hours"
label var v9611 "employed- tenure years"
label var v9612 "employed- tenure months"
label var v9062 "employed- left another job within last 12 months?"
label var v9064 "employed- tenure previous job"  // months
label var v9065 "employed- formal previous job"
label var v9066 "employed- received ui after previous job"
label var v9532 "employed- salary" // some badly imported values - too high

label var v9067 "unemployed- anyjob within last 12 months?"
label var v9971 "unemployed- ocupation last job"
label var v9077 "unemployed- job type last job" // 1 empregado 2 trab domestico 3 conta propria 4 empregador
label var v9078 "unemployed-private public last job"
label var v9083 "unemployed- formal last job"
label var v9084 "unemployed- ui last job"
label var v9085 "unemployed- social security last job"
label var v9861 "unemployed- tenure last job years" // months
label var v9862 "unemployed- tenure last job months" // months

label var v9115 "search week-1"
label var v9116 "search month-1"
label var v9117 "search month-2"
label var v9118 "search month-3 month-15"
label var v9119 "searching method"

label var v9126 "who answered the survey 2own 4-6other"
label var v1272 "savings investments"

// create variables
g gend =  v0302 == 4

g dtbirth = mdy(v3032,v3031,v3033)

g race = v0404
recode race (0=9) (9=1)

g schyrs = v4803
replace schyrs = v4703 if v4803 == .

// Note: Disconsider agriculture workers - (1) very few 1,500 out of 50,000+ receiving UI & (2) different UI regime
g emp = v9005 != .
g emp_form = emp == 1 & v9042 == 2 // any employed worker with labor card
g emp_informal = emp == 1 & v9042 == 4 // any employed worker (empregado - domestico ou nao) without labor card
g emp_self = emp == 1 & v9029 == 3 // v9029 == 3 Self-employed
g emp_employer = emp == 1 & v9029 == 4 // v9029 == 4 Employer
g emp_home = emp == 1 & v9029 >= 5 & v9029 <= 7 // v9029 == 7  not paid - home/family production
g emp_priv = emp==1 & v9032 == 2
g emp_hours = v9058 if emp==1

destring uf, replace

// informality rates for state-level analysis //
preserve
g lf = emp==1 |v9115==1 // labor force participation
keep if lf ==1
g unemp = emp==0&v9115==1 // 1 week definition
g inform3 = emp_informal + emp_self if emp ==1
keep if v0101 >= 2004 & v0101 <=2014
su lf emp unemp inform3
rename v0101 year
collapse unemp inform3 [pw=v4729], by(year uf gend) // year state gender level
append using gtemp2010.dta
save pnad200414_ufyeargender, replace

restore, pres
keep if emp ==1
g inform3 = emp_informal + emp_self
keep if v0101 >= 2004 & v0101 <=2014
collapse inform3 [pw=v4729], by(v0101 uf)
collapse inform3, by(uf)
save pnad200414_uf, replace

restore, pres
keep if emp ==1
g inform3 = emp_informal + emp_self
keep if v0101 >= 2011 & v0101 <=2014
collapse inform3 [pw=v4729], by(v0101 uf)
collapse inform3, by(uf)
save pnad201114_uf, replace

restore
*/

g w = v9532 if emp ==1 & v9532 <= 50000
g w_form = w if emp_form==1
g w_informal = w if emp_inform==1

g emp365 = v9062==2 | v9067==1

g ten = v9611*12 + v9612
label var ten "Tenure in months"

g p_form = v9065 == 1 | v9083 == 1

g p_inform = v9065 == 3 | v9083 == 3

g p_ui = v9066 ==2 | v9084 == 2

g p_ten = v9861*12 + v9862 if v9862!=99
label var p_ten "prev tenure - only if unemployed"

// recovering upa data from household file
destring v0102 v0103, replace
merge m:1 v0101 v0102 v0103 using "${dbox}/Data/PNAD/pnad200214dom.dta", nogen keep(1 3) keepus(v4105 v4107 upa)

preserve
g popsurvey = 1
collapse (sum) popsurvey [pw=v4729], by(v0101 upa)
save temp, replace
restore
merge m:1 v0101 upa using temp, keep(1 3) keepus(popsurvey) nogen
erase temp.dta

// drop agriculture workers
g emp_agric = v9008!=.

g metropo = v4107 == 1 // identify 9 metropolitan regions

cap rename v0101 year

label var v4105 "Urban status or household"
label var v4107 "1 Metropolitan 2 Autorepresentative 3 Non-autorepresentative"

keep year upa v4105 v4107 v4729 schyrs gend race dtbirth uf metropo emp emp365 p_form p_inform p_ui p_ten emp_form emp_inform emp_self emp_priv emp_home emp_employer emp_hours emp_agric w w_form w_inform ten v0404 gend v0402 v0405 v0501 popsurvey v4729

// saving full sample before restricting to workers who left a job with the previous 365 days //
save B5_survey_all, replace

keep if emp365 == 1


save "B3_pnad_merge.dta", replace // sample who left a job in the past 365 days


/// *** /// MERGING /// *** ///
// main survey sample - exclude workers who lost a formal job
use "B3_pnad_merge.dta", clear

rename v4729 wei
keep if v4105 <=3 // keep only households in urban  areas
drop if emp_agric == 1 // drop small share of agriculture workers (not in admin sample)

g large = pops >$pop

keep if p_form==1

cap drop co
bys $varmatch: g co = _N // p_inform splits placebo and actual sample
ta co
keep if co <= 1

compress

g id_survey = _n // identifies observations in the survey to cluster se in matched regression analysis

save "B4_pnad_merge_collapse.dta", replace

// placebo survey sample - only workers who lost a formal job
use "B3_pnad_merge.dta", clear

keep if v4105 <=3 // keep only households in urban  areas
drop if emp_agric == 1 // drop small share of agriculture workers (not in admin sample)

g large = pops >$pop

keep if p_inform==1


cap drop co
bys $varmatch: g co = _N // p_inform splits placebo and actual sample
ta co
keep if co <= 1

compress

g id_survey = _n // identifies observations in the survey to cluster se in matched regression analysis

save "B4_pnad_merge_collapse_placebo.dta", replace


// call script preparing modal race
do "${dbox}/Brazil_Cash_on_new/Do_restat/B5_racemode.do"

// *** // MATCHED SAMPLE FOR GRAPHS // *** //
*/
use "B2_mergeadmin.dta", clear

merge m:1 cpf using B5_racemode, keep(1 3) nogen keepus(racedismbmodemax)
replace race = racedismbmodemax if race != racedismbmodemax & racedismbmodemax !=.
drop racedismbmodemax

g large = pop>1000000

replace school = school / 100

bys $varmatch: g co = _N
label var co "Cluster Size"
replace co = 50 if co >=50
keep if co <=10

merge m:1 $varmatch using "B4_pnad_merge_collapse.dta", nogen keep(1 3) keepus(emp_form emp_informal emp_self id_survey p_ui p_ten w w_form w_inf) // race school

g matched = emp_form!=.
g age = (mdy(12,31,year)-dtbirth)/365
g emp_inf2 = emp_info + emp_self

keep if matched == 1

save B6_surveyrais_matched, replace



// *** // Tenure distribution in RAIS - workers employed in the last week of september, comparable to PNAD // *** //
forvalues year = 2005/2014 {
use "${prais}/`year'/2_`year's.dta", clear
keep if dt_hir<=mdy(9,30,year) & dt_dism>=mdy(9,23,year)
keep year id cpf identificad dt_hir dt_dism tempempr remmedr dtb gend race
g uni = uniform()
keep if uni<=.01
save B6_ten`year', replace
}

clear
forvalues year = 2005/2014 {
append using B6_ten`year'
erase B6_ten`year'.dta
}

replace tempempr = tempempr*7/30
save B6_ten, replace




cap log close